# @shuaige : 陈世玉
# @name :07小爬虫.py
# @time :2024/11/25 16:57
import requests
from bs4 import BeautifulSoup
import bs4
import re
import csv
import codecs
def getHtmlText(url) :
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""
def fillUnivList(ulist,html):
    soup=BeautifulSoup(html,"html.parser")
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr('td')
            paiming=re.findall(r"\d+",tds[0].div.string)[0]
            daxue=tds[1].find('span',class_='name-cn').get_text(strip=True)
            zongfen=re.findall(r"\d+",tds[3].string)[0]
            ulist.append([paiming,daxue,zongfen])
def printUnivList(ulist,num) :
    tplt="{0:^10}\t{1:{3}^10}\t{2:^10}"
    print(tplt.format("排名","学校名称","总分",chr(12288)))
    for i in range(num):
        u=ulist[i]
        print(tplt.format(u[0],u[1],u[2],chr(12288)))

def save_csv(filename,uinfo) :
    with open(filename,'w',encoding='utf-8') as f:
        writer=csv.writer(f)
        writer.writerow(['排名','学校名称','总分'])
        for i in range(len(uinfo)):
            writer.writerow(uinfo[i])
def main():
    uinfo=[]
    url='https://www.shanghairanking.cn/rankings/bcur/2020'
    html=getHtmlText(url)
    fillUnivList(uinfo,html)
    printUnivList(uinfo,20)
    save_csv('top20.csv',uinfo)
    print('爬取完毕')
main()


